##*****************************************************************************
## MULTIPLE IMPUTATION
##*****************************************************************************
## 
## R Version: R version 3.2.4 (2016-03-1)
##

##LOADING PACKAGES
library(ggplot2) ##Version: 2.1.0
library(stringr) ##Version: 1.0.0
library(Amelia) ##Version 1.7.4
library(plyr) ##Version: 1.8.3

##SET WORKING DIRECTORY
setwd('~/workspace/fed_courts/scaling_paper/')

##LOADING DATASET
fjc <- read.csv('replication_materials/data/bonica_sen_fed_judges.csv')


##
##RESHAPING DATASET 
##

##RACE
fjc$black <- 0
fjc$hispanic <- 0
fjc$asian <- 0
fjc$white <- 0
fjc$black[grepl('african',fjc$race.or.ethnicity,ignore.case=T)] <- 1
fjc$hispanic[grepl('hispanic',fjc$race.or.ethnicity,ignore.case=T)] <- 1
fjc$asian[grepl('asian',fjc$race.or.ethnicity,ignore.case=T)] <- 1
fjc$white[grepl('white',fjc$race.or.ethnicity,ignore.case=T)] <- 1

##GENDER
fjc$female <- ifelse(fjc$gender == 'F',1,0)

##ABA RATING
fjc$well.qual  <- ifelse(fjc$aba.rating %in% c('Well Qualified','Exceptionally Well Qualified','Exceptionally Well Q'),1,0)


##NORMALIZING LAW SCHOOL NAMES
fjc$law.school <- tolower(as.character(fjc$law.school))
ls <- str_wrap(gsub('school of law|law school| law center| , warren g. harding| , claude w. pettit| , Paul M. Hebert Law Center|college of law| of Jurisprudence|university of|university|college|school','',fjc$law.school,ignore.case=T))
ls2 <- str_split_fixed(ls,'\\(now |\\)',3)
ls2 <- ifelse(ls2[,2]=='',ls2[,1],ls2[,2])
fjc$law.school <- ls2
aa <- aggregate(fjc$dime.cfscore,list(fjc$law.school),mean,na.rm=T)
mq2 <- match(fjc$law.school,aa[,1])
fjc$imp.ls.mean <- aa[mq2,2]
fjc$imp.ls.mean[is.na(fjc$imp.ls.mean)] <- NA

##EXTRACTING VARIABLES FROM EMPLOYMENT HISTORY
fjc$usatty=ifelse(grepl('u.s. attorney|united states attorney',fjc$employment.text.field,ignore.case=T),1,0)
fjc$judge=ifelse(grepl('judge|justice,',fjc$employment.text.field,ignore.case=T),1,0)
fjc$clerk=ifelse(grepl('clerk',fjc$employment.text.field,ignore.case=T),1,0)
fjc$trialatty=ifelse(grepl('trial attorney',fjc$employment.text.field,ignore.case=T),1,0)
fjc$military=ifelse(grepl('army|navy|air force|marines|naval',fjc$employment.text.field,ignore.case=T),1,0)
fjc$pub.def <- ifelse(grepl('defender',as.character(fjc$employment.text.field),ignore.case=T),1,0)
fjc$prosecutor <- ifelse(grepl('prosec|attorney general|state attorney|city attorney',
                               as.character(fjc$employment.text.field),ignore.case=T),1,0)
fjc$private.practice <- ifelse(grepl('private practice',as.character(fjc$employment.text.field),ignore.case=T),1,0)
fjc$prof <- ifelse(grepl('Dean/professor|profess|faculty|\\>Professor',as.character(fjc$employment.text.field),ignore.case=T),1,0)
fjc$adjunct <- ifelse(grepl('adjunct|visiting|instructor|lecturer',as.character(fjc$employment.text.field),ignore.case=T),1,0)
fjc$notadjunct <- ifelse(grepl('assistant professor|Associate professor',as.character(fjc$employment.text.field),ignore.case=T),1,0)
fjc$prof[fjc$adjunct ==1 & fjc$notadjunct==0] <- 0
fjc$failed.nom <- ifelse(grepl('nominated',as.character(fjc$employment.text.field),ignore.case=T),1,0)

##FACTORIZING APPOINTING PRESIDENT
fjc$pres <- factor(as.character(fjc$president.name))

##CREATING MATRIX OF STATES IN WHICH THE JUDGE WAS EMPLOYED
fjc$st <- factor(fjc$state)
smat <- matrix(0,nrow(fjc),length(state.name))
colnames(smat) <- state.name
for(s in state.name){
    smat[,s] <- ifelse(grepl(s,fjc$employment.text.field,ignore.case=T),1,0)
}
smat <- smat[,colSums(smat)>25]


##
##CONSTRUCTING DATA.FRAME FOR AMELIA
##
impvars <- c('dime.cfscore',
             'judge','military','usatty','clerk',
             'private.practice','prosecutor','pub.def','trialatty',
             'prof','adjunct',
             'failed.nom',
             'pres.dw','jud.chair.dw',
             'female',
             'state.delegation.dw',
             'state.delegation.dime.cfscore',
             'enter.year',
             'clerked.for.con',
             'clerked.for.lib',
             'senscore.dw',
             'senscore.dime.cfscore',
             'both.sen.opp',
             'court.type',
             'black','asian','white','hispanic',
             'birth.year',
             'well.qual',
             'pres',
             'fjc.judge.idno',
             'imp.ls.mean',
             'law.school'
             )
omitvars <- names(fjc)[!(names(fjc) %in% impvars)]
impdata <- cbind(fjc[, impvars],smat)

##ADDING IN INTERACTIONS
mm <- model.matrix(~-1 + female*(black+hispanic+asian),data=impdata)
impdata <- cbind(impdata[,!(colnames(impdata) %in% c(colnames(mm)))],mm)
impdata[,'both.sen.opp:jud.chair.dw'] <- with(impdata,both.sen.opp*jud.chair.dw)
impdata[,'both.sen.opp:senscore.dw'] <- with(impdata,both.sen.opp*senscore.dw)
impdata[,'both.sen.opp:pres.dw'] <- with(impdata,both.sen.opp*pres.dw)
impdata[,'both.sen.opp:state.delegation.dw'] <- with(impdata,both.sen.opp*state.delegation.dw)


##RESHAPING LAW SCHOOL DATA
tt <- table(impdata$law.school[!is.na(impdata$dime.cfscore)])
impdata$law.school <- ifelse(tt[match(impdata$law.school,names(tt))] < 13,'missing',as.character(impdata$law.school))
impdata$law.school[is.na(impdata$law.school)] <- 'missing'
impdata$law.school <- factor(impdata$law.school)

mm2 <- model.matrix(~-1 + law.school,data=impdata)
mm2 <- mm2[,colnames(mm2)!='law.schoolmissing']
mm2 <- mm2[match(1:nrow(impdata),rownames(mm2)),]
impdata <- cbind(impdata[,!(colnames(impdata) %in% c(colnames(mm2)))],mm2)
impdata <- impdata[,!(colnames(impdata) %in% c('law.school')),]

###############################################################################
##IMPUTATION STAGE:
##
##NOTE: THE MULTIPLE IMPUATATION MODEL RUN SEPARATELY ON APPOINTEES GROUPED BY PARTY
##OF THE APPOINTING PRESIDENT. SEE RELATED ARTICLE AND SUPPLEMENTAL APPENDIX
##FOR DETAILS.
###############################################################################

##SPECIFYING NUMBER OF ITERATIONS (SET TO N=25)
num.iters <- 25


##
##IMPUTATION:REPUBLICAN APPOINTEEES ONLY
##

##REMOVE VARIABLES WITH NO VARIATION
rml <- c('asian','female:asian')
imp.rep <- impdata[which(fjc$pres.dw > 0),!(colnames(impdata) %in% rml)]
set.seed(123)
a.out.rep <- amelia(imp.rep,m = num.iters,
                    noms = c('court.type'),
                    idvars=c('fjc.judge.idno'),
                    ords= c('birth.year'),
                    cs='pres',
                    parallel = c("no"),
                    autopri=.5,
                    ts='enter.year',
                    splinetime=2,
                    p2s=0
                    )
o.out.rep <- overimpute(a.out.rep,var='dime.cfscore')
print('Cor Rep:')
print(cor(o.out.rep[,2],o.out.rep[,3]))

##
##IMPUTATION:DEM APPOINTEES ONLY
##

##REMOVE VARIABLES WITH NO VARIATION
rml <- 'law.schoolarkansas'
imp.dem <- impdata[which(fjc$pres.dw < 0),!(colnames(impdata) %in% rml)]
set.seed(123)
a.out.dem <- amelia(imp.dem, m = num.iters,
                    noms = c('court.type'),
                    idvars=c('fjc.judge.idno'),
                    ords= c('birth.year'),
                    cs='pres',
                    parallel = c("no"),
                    ts='enter.year',
                    autopri=.5,
                    splinetime=2,
                    p2s=0
                    )
o.out.dem <- overimpute(a.out.dem,var='dime.cfscore')
print('Cor Dem:')
print(cor(o.out.dem[,2],o.out.dem[,3]))

##
##IMPUTATION: ALL APPOINTEES 
##
set.seed(123)
a.out.all <- amelia(impdata, m = num.iters,
                    noms = c('court.type'),
                    idvars=c('fjc.judge.idno'),
                    ords= c('birth.year'),
                    cs='pres',
                    parallel = c("no"),
                    autopri=.5,
                    splinetime=2,
                    ts='enter.year',
                    p2s=0
                    )
o.out.all <- overimpute(a.out.all,var='dime.cfscore')
print('Cor All:')
print(cor(o.out.all[,2],o.out.all[,3]))

impdata$overimputed.dime.all <- NA
impdata$overimputed.dime.all[o.out.all[,1]] <- o.out.all[,3]
m <- match(fjc$fjc.judge.idno,impdata$fjc.judge.idno)
fjc$overimputed.dime.all <-impdata$overimputed.dime.all[m]


##
##COMBINING RESULTS FROM PARTY SPECIFIC IMPUTATION MODELS
##
keep <- sapply(a.out.dem$imputations,function(z){length(z)}) > 1
imp.dem$imputed.cf <- rowMeans(sapply(a.out.dem$imputations[keep],function(z) return(z$dime.cfscore)) )
keep <- sapply(a.out.rep$imputations,function(z){length(z)}) > 1
imp.rep$imputed.cf <- rowMeans(sapply(a.out.rep$imputations,function(z) return(z$dime.cfscore)) )

o.out <- rbind(o.out.rep,o.out.dem)
print(cor(o.out[,2],o.out[,3]))
imp.dem$overimputed.dime <- NA
imp.rep$overimputed.dime <- NA
imp.dem$overimputed.dime[o.out.dem[,1]] = o.out.dem[,3]
imp.rep$overimputed.dime[o.out.rep[,1]] = o.out.rep[,3]
imp.all <- rbind.fill(imp.dem,imp.rep)

m <- match(fjc$fjc.judge.idno,imp.all$fjc.judge.idno)
fjc$overimputed.dime.cfscore <-imp.all$overimputed.dime[m]
fjc$imputed.cf <-  imp.all$imputed.cf[m]

###############################################################################
##FIGURE A3
###############################################################################
gg <- rbind(cbind(fjc$overimputed.dime.cfscore[!is.na(fjc$dime.cfscore)],'Overimputed (Donors)'),
            cbind(fjc$imputed.cf[is.na(fjc$dime.cfscore)],'Imputed (Non-Donors)'),
            cbind(fjc$dime.cfscore[!is.na(fjc$dime.cfscore)],'Observed Values (Donors)'))
gg <- as.data.frame(gg)
colnames(gg) <- c('x','type')
gg[,1] <- as.numeric(as.character(gg[,1]))
q <- qplot(data=gg,x=x,color=type,linetype=type,geom='density')
q <- q + theme_bw()
q <- q + scale_linetype('')
q <- q + scale_color_grey(guide=FALSE,'')
q <- q + xlab('Contributor DIME Score')
q <- q + theme(legend.position=c(.9,.9))
q
pdf(file='replication_materials/figures/figure_A3.pdf',width=12, height=7)
print(q)
dev.off()


###############################################################################
##FIGURE A4
###############################################################################
library(leiv)

##RESCALING JCS SCORES -- ERRORS-IN-VARIABLES REGRESSION MODEL 
lm.out <- leiv(dime.cfscore~jcs.score.dw,data=fjc,abs.tol=1e-10)
fjc$jcs.score.dw <- lm.out@intercept + lm.out@slope*fjc$jcs.score.dw

##SCATTER PLOT MATRIX
panel.cor <- function(x, y,col=col, digits = 2, prefix = "r = ", cex.cor, ...){

    prefix <- ''
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- round(abs(cor(x, y,use='complete.obs')),2)
    txt <- format(c(r, 0.123456789), digits = digits)[1]
    txt <- paste0('All: ',prefix, txt)
    rdem <- round(abs(cor(x[col=='black'], y[col=='black'],use='complete.obs')),2)
    txtdem <- format(c(rdem, 0.123456789), digits = digits)[1]
    txtdem <- paste0('Dem: ',prefix, txtdem)
    rrep <- round(abs(cor(x[col=='darkgrey'], y[col=='darkgrey'],use='complete.obs')),2)
    txtrep <- format(c(rrep, 0.123456789), digits = digits)[1]
    txtrep <- paste0('Rep: ',prefix, txtrep)
    if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
    text(0.5, 0.7, txt, cex = 2)
    text(0.5, 0.5, txtdem, cex = 2)##,col='black')
    text(0.5, 0.3, txtrep, cex = 2)##,col='darkgrey')
}

panel.hist <- function(x, ...) {
    usr <- par("usr")
    par(new = TRUE,c(usr[1:2], 0, 1.5))
    yul <- max(max(density(na.omit(x[pcol=='darkgrey']),bw = "SJ",adjust=2)$y),
               max(density(na.omit(x[pcol=='black']),bw = "SJ",adjust=2)$y))
    pc <- factor(pcol)
    densityPlot(x~pc,
                adjust=2,
                rug=FALSE,
                grid=FALSE,
                lty=c(1,1),
                legend.location = NA,
                ylim=c(0,yul*1.5),
                xaxt='n',yaxt='n',
                col=c('blue','red'))

}

fjc$pcol  <- ifelse(fjc$party.affiliation.of.president=='Democratic','black','darkgrey')
fjc$pletter<- ifelse(fjc$party.affiliation.of.president=='Democratic','D','R')
syear <- 1980
use <-  fjc$enter.year >= syear
 
use <- !duplicated(fjc$fjc.judge.idno)
cmat <- data.frame(fjc$dime.cfscore,
                   fjc$overimputed.dime.cfscore,
                   fjc$overimputed.dime.all)[use,]
pcol <- fjc$pcol[use]
pletter <- fjc$pletter[use]
colnames(cmat) <- c('Contributor DIME scores',
                    'Overimputed Values (By Party)',
                    'Overimputed Values (Pooled)')

pdf(file='replication_materials/figures/figure_A4.pdf',width=10, height=10)
print(pairs(cmat,col=pcol,
            xlim=c(-1.60,1.60),ylim=c(-1.60,1.60),
            upper.panel = panel.cor,
            pch=pletter,alpha=.4))
dev.off()


